Load data
redo_load(
stores = here("data/stores.rds"),
items = here("data/items.rds"),
prices = here("data/prices.rds")
)
Prices of item by store
set.seed(2542589)
ck_items = sample(items[["item_id"]], 3)
ck_stores = sample(stores[["store_id"]], 4)
prices[
item_id %in% ck_items &
store_id %in% ck_stores
] %>%
as_tibble() %>%
mutate(
item_id = paste0("item_id=", item_id),
store_id = paste0("store_id=", store_id),
) %>%
plotf(price ~ d + store_id + item_id, geom=geom_line) %>%
ggplotly()
Prices of item by stores in the same state
set.seed(2542589)
ck_items = sample(items[["item_id"]], 3)
prices[
item_id %in% ck_items
] %>%
merge(stores[,c("store_id", "state_id", "store_id_state")], by = c("store_id"), all.x = TRUE) %>%
as_tibble() %>%
mutate(
item_id = paste0("item_id=", item_id),
store_id = paste0("store_id=", store_id),
state_id = paste0("state_id=", state_id),
store_id_state = paste0("store_id_state=", store_id_state)
) %>%
plotf(price ~ d + store_id_state + state_id + item_id, geom=geom_line) %>%
ggplotly()
Prices by department
set.seed(2542589)
ck_items =
items %>%
lazy_dt() %>%
count(dept_id, cat_id) %>%
rename(Freq = n) %>%
merge(items, all.y = TRUE) %>%
sample_n(30, replace = TRUE, weight = Freq) %>%
pull(item_id)
prices2 =
merge(
prices[ item_id %in% ck_items ],
items[,c("item_id", "dept_id", "cat_id")],
by = "item_id",
all.x = TRUE
)
plotf(prices2, price ~ dept_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).

plotf(prices2, price ~ cat_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).

plotf(prices2, price ~ dept_id + cat_id, geom=geom_boxplot)
## Warning: Removed 109459 rows containing non-finite values (stat_boxplot).
